{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import requests\n",
    "import kuser_agent as kua\n",
    "import lxml.etree as le"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "content = requests.get(\n",
    "    url ='https://www.runoob.com/html/html-tutorial.html',\n",
    "    headers ={\n",
    "        'User-Agent':kua.get()\n",
    "    }\n",
    ").content\n",
    "with open('菜鸟.html','wb') as f:\n",
    "    f.write(content)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "74 74\n"
     ]
    }
   ],
   "source": [
    "contentx = le.HTML(content)\n",
    "names = contentx.xpath('//div[@id=\"leftcolumn\"]/a/text()')\n",
    "hrefs = contentx.xpath('//div[@id=\"leftcolumn\"]/a/@href')\n",
    "print(len(names),len(hrefs))\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['\\n\\t\\t\\tHTML 教程\\t\\t\\t', '\\n\\t\\t\\tHTML 简介\\t\\t\\t', '\\n\\t\\t\\tHTML 编辑器\\t\\t\\t', '\\n\\t\\t\\tHTML 基础\\t\\t\\t', '\\n\\t\\t\\tHTML 元素\\t\\t\\t', '\\n\\t\\t\\tHTML 属性\\t\\t\\t', '\\n\\t\\t\\tHTML 标题\\t\\t\\t', '\\n\\t\\t\\tHTML 段落\\t\\t\\t', '\\n\\t\\t\\tHTML 文本格式化\\t\\t\\t', '\\n\\t\\t\\tHTML 链接\\t\\t\\t', '\\n\\t\\t\\tHTML 头部\\t\\t\\t', '\\n\\t\\t\\tHTML CSS\\t\\t\\t', '\\n\\t\\t\\tHTML 图像\\t\\t\\t', '\\n\\t\\t\\tHTML 表格\\t\\t\\t', '\\n\\t\\t\\tHTML 列表\\t\\t\\t', '\\n\\t\\t\\tHTML 区块\\t\\t\\t', '\\n\\t\\t\\tHTML 布局\\t\\t\\t', '\\n\\t\\t\\tHTML 表单\\t\\t\\t', '\\n\\t\\t\\tHTML 框架\\t\\t\\t', '\\n\\t\\t\\tHTML 颜色\\t\\t\\t', '\\n\\t\\t\\tHTML 颜色名\\t\\t\\t', '\\n\\t\\t\\tHTML 颜色值\\t\\t\\t', '\\n\\t\\t\\tHTML 脚本\\t\\t\\t', '\\n\\t\\t\\tHTML 字符实体\\t\\t\\t', '\\n\\t\\t\\tHTML URL\\t\\t\\t', '\\n\\t\\t\\tHTML 速查列表\\t\\t\\t', 'HTML 标签简写及全称', '\\n\\t\\t\\tHTML 总结\\t\\t\\t', '\\n\\t\\t\\tXHTML 简介\\t\\t\\t', '\\n\\t\\t\\tHTML5 教程\\t\\t\\t', ' HTML5 浏览器支持 ', '\\n\\t\\t\\tHTML5 新元素\\t\\t\\t', '\\n\\t\\t\\tHTML5 Canvas\\t\\t\\t', '\\n\\t\\t\\tHTML5 内联 SVG\\t\\t\\t', ' HTML5 MathML ', '\\n\\t\\t\\tHTML5 拖放\\t\\t\\t', '\\n\\t\\t\\tHTML5 地理定位\\t\\t\\t', '\\n\\t\\t\\tHTML5 Video(视频)\\t\\t\\t', '\\n\\t\\t\\tHTML5 Audio(音频)\\t\\t\\t', '\\n\\t\\t\\tHTML5 Input 类型\\t\\t\\t', '\\n\\t\\t\\tHTML5 表单元素\\t\\t\\t', '\\n\\t\\t\\tHTML5 表单属性\\t\\t\\t', '\\n\\t\\t\\tHTML5 语义元素\\t\\t\\t', '\\n\\t\\t\\tHTML5 Web 存储\\t\\t\\t', ' HTML5 Web SQL ', '\\n\\t\\t\\tHTML5 应用程序缓存\\t\\t\\t', '\\n\\t\\t\\tHTML5 Web Workers\\t\\t\\t', '\\n\\t\\t\\tHTML5 SSE\\t\\t\\t', ' HTML5 WebSocket ', ' HTML5 测验 ', 'HTML(5) 代码规范 ', '\\n\\t\\t\\tHTML 媒体(Media)\\t\\t\\t', '\\n\\t\\t\\tHTML 插件\\t\\t\\t', '\\n\\t\\t\\tHTML 音频(Audio)\\t\\t\\t', '\\n\\t\\t\\tHTML 视频（Video）播放\\t\\t\\t', '\\n\\t\\t\\tHTML 实例\\t\\t\\t', '\\r\\nHTML 标签列表(字母排序)\\t', '\\r\\nHTML 标签列表（功能排序）\\t', '\\r\\nHTML 属性\\t', '\\r\\nHTML 事件\\t', '\\r\\nHTML 画布\\t', '\\r\\nHTML 音频/视频\\t', '\\r\\nHTML 有效DOCTYPES\\t', '\\r\\nHTML 颜色名\\t', '\\r\\nHTML 拾色器\\t', '\\r\\nHTML 字符集\\t', '\\r\\nHTML ASCII\\t', '\\r\\nHTML ISO-8859-1\\t', '\\r\\nHTML 符号\\t', '\\r\\nHTML URL 编码\\t', '\\r\\nHTML 语言代码\\t', '\\r\\nHTTP 消息\\t', '\\r\\nHTTP 方法\\t', '\\r\\n键盘快捷键\\t']\n"
     ]
    }
   ],
   "source": [
    "print(names)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['/html/html-tutorial.html', '/html/html-intro.html', '/html/html-editors.html', '/html/html-basic.html', '/html/html-elements.html', '/html/html-attributes.html', '/html/html-headings.html', '/html/html-paragraphs.html', '/html/html-formatting.html', '/html/html-links.html', '/html/html-head.html', '/html/html-css.html', '/html/html-images.html', '/html/html-tables.html', '/html/html-lists.html', '/html/html-blocks.html', '/html/html-layouts.html', '/html/html-forms.html', '/html/html-iframes.html', '/html/html-colors.html', '/html/html-colornames.html', '/html/html-colorvalues.html', '/html/html-scripts.html', '/html/html-entities.html', '/html/html-url.html', '/html/html-quicklist.html', '/html/html-tag-name.html', '/html/html-summary.html', '/html/html-xhtml.html', '/html/html5-intro.html', 'html5-browsers.html', '/html/html5-new-element.html', '/html/html5-canvas.html', '/html/html5-svg.html', 'html5-mathml.html', '/html/html5-draganddrop.html', '/html/html5-geolocation.html', '/html/html5-video.html', '/html/html5-audio.html', '/html/html5-form-input-types.html', '/html/html5-form-elements.html', '/html/html5-form-attributes.html', '/html/html5-semantic-elements.html', '/html/html5-webstorage.html', 'html5-web-sql.html', '/html/html5-app-cache.html', '/html/html5-webworkers.html', '/html/html5-serversentevents.html', '/html/html5-websocket.html', '/quiz/html5-quiz.html', '/html/html5-syntax.html', '/html/html-media.html', '/html/html-object.html', '/html/html-sounds.html', '/html/html-videos.html', '/html/html-examples.html', '/tags/html-reference.html', '/tags/ref-byfunc.html', '/tags/ref-standardattributes.html', '/tags/ref-eventattributes.html', '/tags/ref-canvas.html', '/tags/ref-av-dom.html', '/tags/html-elementsdoctypes.html', '/tags/html-colorname.html', '/tags/html-colorpicker.html', '/charsets/html-charsets.html', '/tags/html-ascii.html', '/tags/ref-entities.html', '/tags/html-symbols.html', '/tags/html-urlencode.html', '/tags/html-language-codes.html', '/tags/html-httpmessages.html', '/tags/html-httpmethods.html', '/tags/html-keyboardshortcuts.html']\n"
     ]
    }
   ],
   "source": [
    "print(hrefs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 61,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'str'>\n"
     ]
    }
   ],
   "source": [
    "a = '\\n\\t\\t\\tHTML 元素\\t\\t\\t'\n",
    "a.replace('\\n', '').replace('\\t', '') \n",
    "print(type(a))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 81,
   "metadata": {},
   "outputs": [],
   "source": [
    "new_name = [i.replace('\\n', '').replace('\\t', '').replace('\\r', '')  for i in names]\n",
    "new_href = []\n",
    "for i in hrefs:\n",
    "    if i.startswith('/'):\n",
    "        i = 'https://www.runoob.com'+i\n",
    "    else:\n",
    "        i = 'https://www.runoob.com/'+i\n",
    "    new_href.append(i)\n",
    "data = []\n",
    "for i in range (0,74):\n",
    "      data.append(dict(\n",
    "            name = new_name[i],\n",
    "            href = new_href[i]\n",
    "        ))\n",
    "df = pd.DataFrame(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 82,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>href</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>https://www.runoob.com/html/html-tutorial.html</td>\n",
       "      <td>HTML 教程</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>https://www.runoob.com/html/html-intro.html</td>\n",
       "      <td>HTML 简介</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>https://www.runoob.com/html/html-editors.html</td>\n",
       "      <td>HTML 编辑器</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>https://www.runoob.com/html/html-basic.html</td>\n",
       "      <td>HTML 基础</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>https://www.runoob.com/html/html-elements.html</td>\n",
       "      <td>HTML 元素</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>https://www.runoob.com/html/html-attributes.html</td>\n",
       "      <td>HTML 属性</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>https://www.runoob.com/html/html-headings.html</td>\n",
       "      <td>HTML 标题</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>https://www.runoob.com/html/html-paragraphs.html</td>\n",
       "      <td>HTML 段落</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>https://www.runoob.com/html/html-formatting.html</td>\n",
       "      <td>HTML 文本格式化</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>https://www.runoob.com/html/html-links.html</td>\n",
       "      <td>HTML 链接</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>https://www.runoob.com/html/html-head.html</td>\n",
       "      <td>HTML 头部</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>https://www.runoob.com/html/html-css.html</td>\n",
       "      <td>HTML CSS</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>https://www.runoob.com/html/html-images.html</td>\n",
       "      <td>HTML 图像</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>https://www.runoob.com/html/html-tables.html</td>\n",
       "      <td>HTML 表格</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>https://www.runoob.com/html/html-lists.html</td>\n",
       "      <td>HTML 列表</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>https://www.runoob.com/html/html-blocks.html</td>\n",
       "      <td>HTML 区块</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>https://www.runoob.com/html/html-layouts.html</td>\n",
       "      <td>HTML 布局</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>https://www.runoob.com/html/html-forms.html</td>\n",
       "      <td>HTML 表单</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>https://www.runoob.com/html/html-iframes.html</td>\n",
       "      <td>HTML 框架</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>https://www.runoob.com/html/html-colors.html</td>\n",
       "      <td>HTML 颜色</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>https://www.runoob.com/html/html-colornames.html</td>\n",
       "      <td>HTML 颜色名</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>https://www.runoob.com/html/html-colorvalues.html</td>\n",
       "      <td>HTML 颜色值</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>https://www.runoob.com/html/html-scripts.html</td>\n",
       "      <td>HTML 脚本</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>https://www.runoob.com/html/html-entities.html</td>\n",
       "      <td>HTML 字符实体</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>https://www.runoob.com/html/html-url.html</td>\n",
       "      <td>HTML URL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>https://www.runoob.com/html/html-quicklist.html</td>\n",
       "      <td>HTML 速查列表</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>https://www.runoob.com/html/html-tag-name.html</td>\n",
       "      <td>HTML 标签简写及全称</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>https://www.runoob.com/html/html-summary.html</td>\n",
       "      <td>HTML 总结</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>https://www.runoob.com/html/html-xhtml.html</td>\n",
       "      <td>XHTML 简介</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>https://www.runoob.com/html/html5-intro.html</td>\n",
       "      <td>HTML5 教程</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>44</th>\n",
       "      <td>https://www.runoob.com/html5-web-sql.html</td>\n",
       "      <td>HTML5 Web SQL</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>45</th>\n",
       "      <td>https://www.runoob.com/html/html5-app-cache.html</td>\n",
       "      <td>HTML5 应用程序缓存</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>46</th>\n",
       "      <td>https://www.runoob.com/html/html5-webworkers.html</td>\n",
       "      <td>HTML5 Web Workers</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>47</th>\n",
       "      <td>https://www.runoob.com/html/html5-serversentev...</td>\n",
       "      <td>HTML5 SSE</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>48</th>\n",
       "      <td>https://www.runoob.com/html/html5-websocket.html</td>\n",
       "      <td>HTML5 WebSocket</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>49</th>\n",
       "      <td>https://www.runoob.com/quiz/html5-quiz.html</td>\n",
       "      <td>HTML5 测验</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50</th>\n",
       "      <td>https://www.runoob.com/html/html5-syntax.html</td>\n",
       "      <td>HTML(5) 代码规范</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>51</th>\n",
       "      <td>https://www.runoob.com/html/html-media.html</td>\n",
       "      <td>HTML 媒体(Media)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>52</th>\n",
       "      <td>https://www.runoob.com/html/html-object.html</td>\n",
       "      <td>HTML 插件</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>53</th>\n",
       "      <td>https://www.runoob.com/html/html-sounds.html</td>\n",
       "      <td>HTML 音频(Audio)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>54</th>\n",
       "      <td>https://www.runoob.com/html/html-videos.html</td>\n",
       "      <td>HTML 视频（Video）播放</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>55</th>\n",
       "      <td>https://www.runoob.com/html/html-examples.html</td>\n",
       "      <td>HTML 实例</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>https://www.runoob.com/tags/html-reference.html</td>\n",
       "      <td>HTML 标签列表(字母排序)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>https://www.runoob.com/tags/ref-byfunc.html</td>\n",
       "      <td>HTML 标签列表（功能排序）</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>https://www.runoob.com/tags/ref-standardattrib...</td>\n",
       "      <td>HTML 属性</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>https://www.runoob.com/tags/ref-eventattribute...</td>\n",
       "      <td>HTML 事件</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>https://www.runoob.com/tags/ref-canvas.html</td>\n",
       "      <td>HTML 画布</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>61</th>\n",
       "      <td>https://www.runoob.com/tags/ref-av-dom.html</td>\n",
       "      <td>HTML 音频/视频</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>62</th>\n",
       "      <td>https://www.runoob.com/tags/html-elementsdocty...</td>\n",
       "      <td>HTML 有效DOCTYPES</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>63</th>\n",
       "      <td>https://www.runoob.com/tags/html-colorname.html</td>\n",
       "      <td>HTML 颜色名</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>64</th>\n",
       "      <td>https://www.runoob.com/tags/html-colorpicker.html</td>\n",
       "      <td>HTML 拾色器</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>65</th>\n",
       "      <td>https://www.runoob.com/charsets/html-charsets....</td>\n",
       "      <td>HTML 字符集</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>66</th>\n",
       "      <td>https://www.runoob.com/tags/html-ascii.html</td>\n",
       "      <td>HTML ASCII</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>67</th>\n",
       "      <td>https://www.runoob.com/tags/ref-entities.html</td>\n",
       "      <td>HTML ISO-8859-1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>68</th>\n",
       "      <td>https://www.runoob.com/tags/html-symbols.html</td>\n",
       "      <td>HTML 符号</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>69</th>\n",
       "      <td>https://www.runoob.com/tags/html-urlencode.html</td>\n",
       "      <td>HTML URL 编码</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>70</th>\n",
       "      <td>https://www.runoob.com/tags/html-language-code...</td>\n",
       "      <td>HTML 语言代码</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>71</th>\n",
       "      <td>https://www.runoob.com/tags/html-httpmessages....</td>\n",
       "      <td>HTTP 消息</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>72</th>\n",
       "      <td>https://www.runoob.com/tags/html-httpmethods.html</td>\n",
       "      <td>HTTP 方法</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>73</th>\n",
       "      <td>https://www.runoob.com/tags/html-keyboardshort...</td>\n",
       "      <td>键盘快捷键</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>74 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                                 href               name\n",
       "0      https://www.runoob.com/html/html-tutorial.html            HTML 教程\n",
       "1         https://www.runoob.com/html/html-intro.html            HTML 简介\n",
       "2       https://www.runoob.com/html/html-editors.html           HTML 编辑器\n",
       "3         https://www.runoob.com/html/html-basic.html            HTML 基础\n",
       "4      https://www.runoob.com/html/html-elements.html            HTML 元素\n",
       "5    https://www.runoob.com/html/html-attributes.html            HTML 属性\n",
       "6      https://www.runoob.com/html/html-headings.html            HTML 标题\n",
       "7    https://www.runoob.com/html/html-paragraphs.html            HTML 段落\n",
       "8    https://www.runoob.com/html/html-formatting.html         HTML 文本格式化\n",
       "9         https://www.runoob.com/html/html-links.html            HTML 链接\n",
       "10         https://www.runoob.com/html/html-head.html            HTML 头部\n",
       "11          https://www.runoob.com/html/html-css.html           HTML CSS\n",
       "12       https://www.runoob.com/html/html-images.html            HTML 图像\n",
       "13       https://www.runoob.com/html/html-tables.html            HTML 表格\n",
       "14        https://www.runoob.com/html/html-lists.html            HTML 列表\n",
       "15       https://www.runoob.com/html/html-blocks.html            HTML 区块\n",
       "16      https://www.runoob.com/html/html-layouts.html            HTML 布局\n",
       "17        https://www.runoob.com/html/html-forms.html            HTML 表单\n",
       "18      https://www.runoob.com/html/html-iframes.html            HTML 框架\n",
       "19       https://www.runoob.com/html/html-colors.html            HTML 颜色\n",
       "20   https://www.runoob.com/html/html-colornames.html           HTML 颜色名\n",
       "21  https://www.runoob.com/html/html-colorvalues.html           HTML 颜色值\n",
       "22      https://www.runoob.com/html/html-scripts.html            HTML 脚本\n",
       "23     https://www.runoob.com/html/html-entities.html          HTML 字符实体\n",
       "24          https://www.runoob.com/html/html-url.html           HTML URL\n",
       "25    https://www.runoob.com/html/html-quicklist.html          HTML 速查列表\n",
       "26     https://www.runoob.com/html/html-tag-name.html       HTML 标签简写及全称\n",
       "27      https://www.runoob.com/html/html-summary.html            HTML 总结\n",
       "28        https://www.runoob.com/html/html-xhtml.html           XHTML 简介\n",
       "29       https://www.runoob.com/html/html5-intro.html           HTML5 教程\n",
       "..                                                ...                ...\n",
       "44          https://www.runoob.com/html5-web-sql.html     HTML5 Web SQL \n",
       "45   https://www.runoob.com/html/html5-app-cache.html       HTML5 应用程序缓存\n",
       "46  https://www.runoob.com/html/html5-webworkers.html  HTML5 Web Workers\n",
       "47  https://www.runoob.com/html/html5-serversentev...          HTML5 SSE\n",
       "48   https://www.runoob.com/html/html5-websocket.html   HTML5 WebSocket \n",
       "49        https://www.runoob.com/quiz/html5-quiz.html          HTML5 测验 \n",
       "50      https://www.runoob.com/html/html5-syntax.html      HTML(5) 代码规范 \n",
       "51        https://www.runoob.com/html/html-media.html     HTML 媒体(Media)\n",
       "52       https://www.runoob.com/html/html-object.html            HTML 插件\n",
       "53       https://www.runoob.com/html/html-sounds.html     HTML 音频(Audio)\n",
       "54       https://www.runoob.com/html/html-videos.html   HTML 视频（Video）播放\n",
       "55     https://www.runoob.com/html/html-examples.html            HTML 实例\n",
       "56    https://www.runoob.com/tags/html-reference.html    HTML 标签列表(字母排序)\n",
       "57        https://www.runoob.com/tags/ref-byfunc.html    HTML 标签列表（功能排序）\n",
       "58  https://www.runoob.com/tags/ref-standardattrib...            HTML 属性\n",
       "59  https://www.runoob.com/tags/ref-eventattribute...            HTML 事件\n",
       "60        https://www.runoob.com/tags/ref-canvas.html            HTML 画布\n",
       "61        https://www.runoob.com/tags/ref-av-dom.html         HTML 音频/视频\n",
       "62  https://www.runoob.com/tags/html-elementsdocty...    HTML 有效DOCTYPES\n",
       "63    https://www.runoob.com/tags/html-colorname.html           HTML 颜色名\n",
       "64  https://www.runoob.com/tags/html-colorpicker.html           HTML 拾色器\n",
       "65  https://www.runoob.com/charsets/html-charsets....           HTML 字符集\n",
       "66        https://www.runoob.com/tags/html-ascii.html         HTML ASCII\n",
       "67      https://www.runoob.com/tags/ref-entities.html    HTML ISO-8859-1\n",
       "68      https://www.runoob.com/tags/html-symbols.html            HTML 符号\n",
       "69    https://www.runoob.com/tags/html-urlencode.html        HTML URL 编码\n",
       "70  https://www.runoob.com/tags/html-language-code...          HTML 语言代码\n",
       "71  https://www.runoob.com/tags/html-httpmessages....            HTTP 消息\n",
       "72  https://www.runoob.com/tags/html-httpmethods.html            HTTP 方法\n",
       "73  https://www.runoob.com/tags/html-keyboardshort...              键盘快捷键\n",
       "\n",
       "[74 rows x 2 columns]"
      ]
     },
     "execution_count": 82,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 83,
   "metadata": {},
   "outputs": [],
   "source": [
    "writer = pd.ExcelWriter('菜鸟.xlsx')\n",
    "\n",
    "df.to_excel(writer,sheet_name='标签')\n",
    "\n",
    "writer.save()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
